home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Software Vault: The Gold Collection
/
Software Vault - The Gold Collection (American Databankers) (1993).ISO
/
cdr49
/
actlib13.zip
/
STRINGS.ZIP
/
REGEXP.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-04-08
|
7KB
|
285 lines
/* Copyright (C) 1993 Marc Stern (internet: stern@mble.philips.be) */
#include "strings.h"
#include <stdlib.h>
/*
Functions : matchset
match
recursexp
regexp
*/
/***
* Function : matchset
*
* Description : Test if a character matches a set expression.
*
* Parameters : in char c character to be matched
* in char *pattern regular expression to match
*
* Parameters : in char *string
*
* Decisions : The following symbols are treated specially:
*
* \ quote next character - range of values
* ^ non-inclusion (if first character)
*
* ex: aeiou0-9 match a, e, i, o, u, and 0 thru 9
* ^aeiou0-9 match anything but a, e, i, o, u, and 0 thru 9
*
* Return : 1 or 0
*
* OS/Compiler : All
***/
int matchset( char c, char *pattern )
{ const char *begin;
int match_ok = 1;
if ( ! c ) return 0;
if ( *pattern == '^' ) { match_ok = 0; pattern ++; }
for ( begin = pattern; *pattern; begin = pattern++ )
{
if ( (*pattern == '-') && (pattern != begin) ) /* range */
{
pattern ++; if ( *pattern == '\\' ) pattern ++;
if ( (min(*pattern, *begin) <= c) && (max(*pattern, *begin) >= c) )
return match_ok;
}
if ( *pattern == '\\' ) pattern++;
if ( *pattern == c ) return match_ok;
}
return (! match_ok);
}
/***
* Function : match (internal)
*
* Description : Returns the number of character of a string matched
* by a one character regular expression.
*
* Decisions : The following symbols are treated specially:
*
* . any character \ quote next character
* [] set of characters
*
* Parameters : in char *string input string to be matched
* in char *pattern regular expression to match
*
* Side-effects: pattern contents will be destroyed.
*
* Return : number of character matched by regular expression
* 0 if not matched
*
* OS/Compiler : All
***/
static int near match( const char *string, char *pattern )
{ char *ptr;
int length = 0;
switch ( *pattern )
{
case '.' : *pattern = '\0';
length = strlen(string);
break;
case '[' : for ( ptr = ++pattern;
*ptr && ! (*ptr == ']' && *(ptr - 1) != '\\');
ptr ++ );
if ( *ptr ) *ptr = '\0';
while ( matchset(*string++, pattern) ) length ++;
break;
case '\\': pattern ++;
default : while ( *string++ == *pattern ) length ++;
*pattern = '\0';
break;
}
return length;
}
/***
* Function : recursexp
*
* Description : Returns the number of character of a string matched
* by a regular expression.
*
* Decisions : The following symbols are treated specially:
*
* . any character \ quote next character
* * match zero or more + match one or more
* [] set of characters
*
*
* Parameters : in char *string input string to be matched
* in char *pattern regular expression to match
*
* Return : number of character matched by regular expression
* -1 if not matched
*
* OS/Compiler : All
***/
int recursexp( const char *string, char *pattern )
{ int count1, count2 = -1, minone = 0;
char *pattrn, *ptr;
if ( ! *pattern ) return 0;
ptr = pattrn = strdup( pattern ); /* store it for free() */
count1 = match( string, pattrn );
if ( count1 < 0 ) { free( ptr );
return -1;
}
while ( *pattrn++ );
switch ( *pattrn )
{
case '\0': free( ptr );
if ( count1 ) return 1;
else return -1;
case '*': pattrn ++;
minone = 0;
break;
case '+': if ( ! count1 ) { free( ptr );
return -1;
}
pattrn ++;
break;
default : if ( ! count1 ) { free( ptr );
return -1;
}
count1 = 1;
break;
}
for ( ; count1 >= 0; count1 -- )
{
count2 = recursexp( string + count1, pattrn );
if ( count2 >= 0 ) break;
}
free( ptr );
if ( count2 < 0 ) return -1;
if ( ! count1 && minone ) return -1;
return (count1 + count2);
}
/***
* Function : regexp
*
* Description : Returns the string matched by a regular expression
* into a string.
*
* Decisions : The following symbols are treated specially:
*
* ^ start of line $ end of line
* ? any character \ quote next character
* * match zero or more [] set of characters
*
* ex: [aeiou0-9] match a, e, i, o, u, and 0 thru 9
* [^aeiou0-9] match anything but a, e, i, o, u, and 0 thru 9
*
* Parameters : out char *outstr resulting string
* in char *string input string in which we search
* in char *pattern regular expression to match
*
* Return : - pointer to resulting string
* - if ( outstr == NULL ) returns pointer to matched string
* inside 'string'.
*
* OS/Compiler : All
***/
char *regexp( char *outstr, const char *string, const char *pattern )
{ char *ptr, *pattrn;
int count, begin = 0, end = 0;
pattrn = strdup( pattern );
ptr = strend( pattrn ) - 1;
if ( (*ptr == '$') && (*(ptr - 1) != '\\') ) /* Match end of line */
{
end = 1;
*ptr = '\0';
}
ptr = pattrn; /* store it for free() */
if ( *pattrn == '^' ) /* Match begin of line */
{
begin = 1;
pattrn ++;
}
for (; *string; string ++ )
if ( (count = recursexp(string, pattrn)) >= 0 || begin ) break;
free( ptr );
if ( end && (count != strlen(string)) ) count = 0;
if ( outstr )
{
strleft( outstr, string, count );
return outstr;
}
return (char *)string;
}
#ifdef TEST
#include <stdio.h>
#include <stdlib.h>
void main()
{ char string[255], pattern[255], result[255];
regexp( result, "I123", "[^A-Z\\-^][0-9]+\\.*" );
printf( "\n Result : %s\n\n", result );
exit(1);
for (;;) {
printf( "\n String : " ); gets( string );
printf( " Pattern: " ); gets( pattern );
regexp( result, string, pattern );
printf( "\n Result : %s\n\n", result );
}
}
#endif